In [1]:
# Importing the Libraries
import pandas as pd
import numpy as np
import plotly.express as px
In [2]:
# Loading the Dataset
data = pd.read_csv('deliverytime.txt')
In [3]:
# Analyse the Top 5 rows of the Dataset
data.head()
Out[3]:
ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Type_of_order Type_of_vehicle Time_taken(min)
0 4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 Snack motorcycle 24
1 B379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 Snack scooter 33
2 5D6D BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 Drinks motorcycle 26
3 7A6A COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 Buffet motorcycle 21
4 70A2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 Snack scooter 30
In [4]:
# Check information about the Dataset
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45593 entries, 0 to 45592
Data columns (total 11 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   ID                           45593 non-null  object 
 1   Delivery_person_ID           45593 non-null  object 
 2   Delivery_person_Age          45593 non-null  int64  
 3   Delivery_person_Ratings      45593 non-null  float64
 4   Restaurant_latitude          45593 non-null  float64
 5   Restaurant_longitude         45593 non-null  float64
 6   Delivery_location_latitude   45593 non-null  float64
 7   Delivery_location_longitude  45593 non-null  float64
 8   Type_of_order                45593 non-null  object 
 9   Type_of_vehicle              45593 non-null  object 
 10  Time_taken(min)              45593 non-null  int64  
dtypes: float64(5), int64(2), object(4)
memory usage: 3.8+ MB
In [5]:
# Now let's check for any null values
data.isnull().sum()
Out[5]:
ID                             0
Delivery_person_ID             0
Delivery_person_Age            0
Delivery_person_Ratings        0
Restaurant_latitude            0
Restaurant_longitude           0
Delivery_location_latitude     0
Delivery_location_longitude    0
Type_of_order                  0
Type_of_vehicle                0
Time_taken(min)                0
dtype: int64
In [7]:
# Now let's check for any duplicated values
data.duplicated().sum()
Out[7]:
0
Calculating Distance Between Two Latitudes and Longitudes¶
The dataset doesn’t have any feature that shows the difference between the restaurant and the delivery location. All we have are the latitude and longitude points of the restaurant and the delivery location. We can use the haversine formula to calculate the distance between two locations based on their latitudes and longitudes¶
In [8]:
# Set the earth's radius (in kilometers)
R = 6371

# Convert degrees to radians
def deg_to_rad(degrees):
    return degrees * (np.pi/180)

# Function to calculate distance between two points using the haversine formula
def distcalculate(lat1, lon1, lat2, lon2):
    d_lat = deg_to_rad(lat2-lat1)
    d_lon = deg_to_rad(lon2-lon1)
    a = np.sin(d_lat/2)**2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return R * c
In [9]:
# Calculate the distance between each pair of points
data['distance'] = np.nan
In [11]:
for i in range(len(data)):
    data.loc[i, 'distance'] = distcalculate(data.loc[i, 'Restaurant_latitude'], 
                                            data.loc[i, 'Restaurant_longitude'], 
                                            data.loc[i, 'Delivery_location_latitude'], 
                                            data.loc[i, 'Delivery_location_longitude'])
In [12]:
# Now let's check the new feature we have added in the dataset as distance
data.head()
Out[12]:
ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Type_of_order Type_of_vehicle Time_taken(min) distance
0 4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 Snack motorcycle 24 3.025149
1 B379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 Snack scooter 33 20.183530
2 5D6D BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 Drinks motorcycle 26 1.552758
3 7A6A COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 Buffet motorcycle 21 7.790401
4 70A2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 Snack scooter 30 6.210138
Data Exploration¶
In [13]:
figure = px.scatter(data_frame= data,
                   x = 'distance',
                   y = 'Time_taken(min)',
                   size = 'Time_taken(min)',
                   trendline = 'ols',
                   title = 'Relationship Between Distance and Time Taken')

figure.show()
In [14]:
# Now let’s have a look at the relationship between the time taken to deliver the food and the age of the delivery partner
figure = px.scatter(data_frame = data, 
                    x="Delivery_person_Age",
                    y="Time_taken(min)", 
                    size="Time_taken(min)", 
                    color = "distance",
                    trendline="ols", 
                    title = "Relationship Between Time Taken and Age")
figure.show()
In [15]:
# Now let’s have a look at the relationship between the time taken to deliver the food and the ratings of the delivery partner
figure = px.scatter(data_frame = data, 
                    x="Delivery_person_Ratings",
                    y="Time_taken(min)", 
                    size="Time_taken(min)", 
                    color = "distance",
                    trendline="ols", 
                    title = "Relationship Between Time Taken and Ratings")
figure.show()
In [16]:
# Now let’s have a look at the type of food ordered by the customer and whether the type of vehicle used by the delivery partner affects the delivery time or not
fig = px.box(data, 
             x="Type_of_vehicle",
             y="Time_taken(min)", 
             color="Type_of_order")
fig.show()

Food Delivery Time Prediction Model¶

In [17]:
# Splitting the Dataset
from sklearn.model_selection import train_test_split
In [18]:
X = np.array(data[["Delivery_person_Age", 
                   "Delivery_person_Ratings", 
                   "distance"]])
y = np.array(data[["Time_taken(min)"]])
In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.10, random_state= 42)
In [21]:
# creating the LSTM neural network model
from keras.models import Sequential
from keras.layers import Dense, LSTM
In [22]:
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape = (X_train.shape[1], 1)))
model.add(LSTM(64, return_sequences= False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()
C:\Users\Kamran\anaconda3\anaconda_new\Lib\site-packages\keras\src\layers\rnn\rnn.py:200: UserWarning:

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ lstm (LSTM)                     │ (None, 3, 128)         │        66,560 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_1 (LSTM)                   │ (None, 64)             │        49,408 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 25)             │         1,625 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 1)              │            26 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 117,619 (459.45 KB)
 Trainable params: 117,619 (459.45 KB)
 Non-trainable params: 0 (0.00 B)
In [24]:
# training the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=1, epochs=9)
Epoch 1/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 103s 2ms/step - loss: 75.7650
Epoch 2/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 107s 3ms/step - loss: 64.7484
Epoch 3/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 93s 2ms/step - loss: 60.8735
Epoch 4/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 91s 2ms/step - loss: 60.0647
Epoch 5/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 88s 2ms/step - loss: 60.2912
Epoch 6/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 89s 2ms/step - loss: 59.0659
Epoch 7/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 94s 2ms/step - loss: 59.0924
Epoch 8/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 88s 2ms/step - loss: 58.4746
Epoch 9/9
41033/41033 ━━━━━━━━━━━━━━━━━━━━ 89s 2ms/step - loss: 58.9557
Out[24]:
<keras.src.callbacks.history.History at 0x224194fdbe0>
In [25]:
# Now let’s test the performance of our model by giving inputs to predict the food delivery time:
print("Food Delivery Time Prediction")
a = int(input("Age of Delivery Partner: "))
b = float(input("Ratings of Previous Deliveries: "))
c = int(input("Total Distance: "))

features = np.array([[a, b, c]])
print("Predicted Delivery Time in Minutes = ", model.predict(features))
Food Delivery Time Prediction
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 456ms/step
Predicted Delivery Time in Minutes =  [[35.605858]]
In [ ]: